name: 100 Nodes Scale Test

on:
  schedule:
    - cron: '39 0 * * 1-5'

permissions:
  # To be able to access the repository with actions/checkout
  contents: read
  # To be able to request the JWT from GitHub's OIDC provider
  id-token: write

concurrency:
  # Structure:
  # - Workflow name
  # - Event type
  # - A unique identifier depending on event type:
  #   - schedule: SHA
  #   - workflow_dispatch: PR number
  #
  # This structure ensures a unique concurrency group name is generated for each
  # type of testing, such that re-runs will cancel the previous run.
  group: |
    ${{ github.workflow }}
    ${{ github.event_name }}
    ${{
      (github.event_name == 'schedule' && github.sha) ||
      (github.event_name == 'workflow_dispatch' && github.event.inputs.PR-number)
    }}
  cancel-in-progress: true

env:
  # renovate: datasource=golang-version depName=go
  go_version: 1.22.0
  # Adding k8s.local to the end makes kops happy-
  # has stricter DNS naming requirements.
  test_name: scale-100
  cluster_base_name: ${{ github.run_id }}-${{ github.run_attempt }}.k8s.local

jobs:
  install-and-scaletest:
    runs-on: ubuntu-latest
    name: Install and Scale Test
    timeout-minutes: 150
    steps:
      - name: Checkout context ref (trusted)
        uses: actions/checkout@b4ffde65f46336ab88eb53be808477a3936bae11 # v4.1.1
        with:
          ref: ${{ inputs.context-ref || github.sha }}
          persist-credentials: false

      - name: Set Environment Variables
        uses: ./.github/actions/set-env-variables

      - name: Get Cilium's default values
        id: default_vars
        uses: ./.github/actions/helm-default
        with:
          image-tag: ${{ github.sha }}

      - name: Set up job variables
        id: vars
        run: |
          SHA="${{ github.sha }}"

          # Setup Cilium install options
          CILIUM_INSTALL_DEFAULTS="${{ steps.default_vars.outputs.cilium_install_defaults }} \
            --wait=false"

          CLUSTER_NAME="${{ env.test_name }}-${{ env.cluster_base_name }}"

          echo SHA=${SHA} >> $GITHUB_OUTPUT
          echo cilium_install_defaults=${CILIUM_INSTALL_DEFAULTS} >> $GITHUB_OUTPUT
          echo CLUSTER_NAME=${CLUSTER_NAME} >> $GITHUB_OUTPUT

      - name: Wait for images to be available
        timeout-minutes: 30
        shell: bash
        run: |
          for image in cilium-ci operator-generic-ci hubble-relay-ci ; do
            until docker manifest inspect quay.io/${{ env.QUAY_ORGANIZATION_DEV }}/$image:${{ steps.vars.outputs.SHA }} &> /dev/null; do sleep 45s; done
          done

      - name: Install Go
        uses: actions/setup-go@0c52d547c9bc32b1aa3301fd7a9cb496313a4491 # v5.0.0
        with:
          go-version: ${{ env.go_version }}

      - name: Install Cilium CLI
        uses: cilium/cilium-cli@7306e3cdc6caee738157f08e3e1ba26179f104e5 # v0.15.23
        with:
          repository: ${{ env.CILIUM_CLI_RELEASE_REPO }}
          release-version: ${{ env.CILIUM_CLI_VERSION }}

      - name: Install Kops
        uses: cilium/scale-tests-action/install-kops@238d773bd07754bfd693a6b22c94eddf3a12778d # main

      - name: Setup gcloud credentials
        uses: google-github-actions/auth@a6e2e39c0a0331da29f7fd2c2a20a427e8d3ad1f # v2.1.1
        with:
          workload_identity_provider: ${{ secrets.GCP_PERF_WORKLOAD_IDENTITY_PROVIDER }}
          service_account: ${{ secrets.GCP_PERF_SA }}
          create_credentials_file: true
          export_environment_variables: true

      - name: Setup gcloud CLI
        uses: google-github-actions/setup-gcloud@98ddc00a17442e89a24bbf282954a3b65ce6d200 # v2.1.0
        with:
          project_id: ${{ secrets.GCP_PERF_PROJECT_ID }}
          version: "405.0.0"

      - name: Clone ClusterLoader2
        uses: actions/checkout@b4ffde65f46336ab88eb53be808477a3936bae11 # v4.1.1
        with:
          repository: kubernetes/perf-tests
          ref: 82036a7e9bdb803b5b133c2d83cfec710819f9ab # master
          persist-credentials: false
          sparse-checkout: clusterloader2
          path: perf-tests

      - name: Display version info of installed tools
        run: |
          echo "--- go ---"
          go version
          echo "--- cilium-cli ---"
          cilium version --client
          echo "--- kops ---"
          ./kops version
          echo "--- gcloud ---"
          gcloud version

      - name: Deploy cluster
        id: deploy-cluster
        uses: cilium/scale-tests-action/create-cluster@238d773bd07754bfd693a6b22c94eddf3a12778d # main
        timeout-minutes: 30
        with:
          cluster_name: ${{ steps.vars.outputs.cluster_name }}
          control_plane_size: n1-standard-8
          control_plane_count: 1
          node_size: e2-medium
          node_count: 100
          kops_state: ${{ secrets.GCP_PERF_KOPS_STATE_STORE }}
          project_id: ${{ secrets.GCP_PERF_PROJECT_ID }}

      - name: Create Instance Group for resource heavy deployments
        uses: cilium/scale-tests-action/create-instance-group@238d773bd07754bfd693a6b22c94eddf3a12778d # main
        timeout-minutes: 30
        with:
          cluster_name: ${{ steps.vars.outputs.cluster_name }}
          node_size: e2-standard-8
          node_count: 1
          ig_name: heapster
          kops_state: ${{ secrets.GCP_PERF_KOPS_STATE_STORE }}

      - name: Setup firewall rules
        uses: cilium/scale-tests-action/setup-firewall@238d773bd07754bfd693a6b22c94eddf3a12778d # main
        with:
          cluster_name: ${{ steps.vars.outputs.cluster_name }}

      - name: Install Cilium
        run: |
          cilium install ${{ steps.vars.outputs.cilium_install_defaults }}

      - name: Wait for cluster to be ready
        uses: cilium/scale-tests-action/validate-cluster@238d773bd07754bfd693a6b22c94eddf3a12778d # main
        timeout-minutes: 20
        with:
          cluster_name: ${{ steps.vars.outputs.cluster_name }}
          kops_state: ${{ secrets.GCP_PERF_KOPS_STATE_STORE }}

      - name: Wait for Cilium status to be ready
        run: |
          cilium status --wait

      - name: Run CL2
        id: run-cl2
        working-directory: ./perf-tests/clusterloader2
        shell: bash
        timeout-minutes: 30
        run: |
          mkdir ./report
          export CL2_PROMETHEUS_PVC_ENABLED=false
          export CL2_ENABLE_PVS=false
          export CL2_ENABLE_NETWORKPOLICIES=true
          export CL2_ALLOWED_SLOW_API_CALLS=1
          export CL2_SCHEDULER_THROUGHPUT_THRESHOLD=0

          # CL2 needs ssh access to control plane nodes
          gcloud compute config-ssh

          go run ./cmd/clusterloader.go \
            -v=2 \
            --testconfig=./testing/load/config.yaml \
            --provider=gce \
            --enable-prometheus-server \
            --tear-down-prometheus-server=false \
            --nodes=100 \
            --report-dir=./report \
            --experimental-prometheus-snapshot-to-report-dir=true \
            --kubeconfig=$HOME/.kube/config \
            --testoverrides=./testing/overrides/load_throughput.yaml \
            --testoverrides=./testing/experiments/use_simple_latency_query.yaml \
            --testoverrides=./testing/prometheus/not-scrape-kube-proxy.yaml \
            2>&1 | tee cl2-output.txt

      - name: Get sysdump
        if: ${{ always() && steps.run-cl2.outcome != 'skipped' && steps.run-cl2.outcome != 'cancelled' }}
        run: |
          cilium status
          cilium sysdump --output-filename cilium-sysdump-final

      - name: Cleanup cluster
        if: ${{ always() && steps.deploy-cluster.outcome != 'skipped' }}
        uses: cilium/scale-tests-action/cleanup-cluster@238d773bd07754bfd693a6b22c94eddf3a12778d # main
        with:
          cluster_name: ${{ steps.vars.outputs.cluster_name }}
          kops_state: ${{ secrets.GCP_PERF_KOPS_STATE_STORE }}

      - name: Export results and sysdump to GS bucket
        if: ${{ always() && steps.run-cl2.outcome != 'skipped' && steps.run-cl2.outcome != 'cancelled' }}
        uses: cilium/scale-tests-action/export-results@238d773bd07754bfd693a6b22c94eddf3a12778d # main
        with:
          test_name: ${{ env.test_name }}
          results_bucket: ${{ env.GCP_PERF_RESULTS_BUCKET }}
          artifacts: ./perf-tests/clusterloader2/report/*
          other_files: cilium-sysdump-final.zip ./perf-tests/clusterloader2/cl2-output.txt
